## add 'developer' to packages to be installed from github
x <- c("devtools", "maRce10/warbleR", "readxl", "ranger", "caret", "e1071", "pbapply", "viridis", "ggplot2", "kableExtra", "rlang", "Sim.DiffProc", "soundgen", "markovchain", "igraph", "TraMineR", "spgs")
aa <- lapply(x, function(y) {
# get pakage name
pkg <- strsplit(y, "/")[[1]]
pkg <- pkg[length(pkg)]
# check if installed, if not then install
if (!pkg %in% installed.packages()[,"Package"]) {
if (grepl("/", y)) devtools::install_github(y, force = TRUE) else
install.packages(y)
}
# load package
try(require(pkg, character.only = T), silent = T)
})
warbleR_options(wl = 1100, parallel = parallel::detectCores() - 1, bp = "frange", fast = TRUE, threshold = 15, ovlp = 20)
opts_knit$set(root.dir = "..")
# set evaluation false
opts_chunk$set( fig.width = 6, fig.height = 3, eval = FALSE, warning = FALSE, message = FALSE, tidy = TRUE)
# number of trees in Random Forest models
num.trees <- 2000
# replicates in Random Forest replication
reps <- 50
# sensitivity cutoff
cutoff <- 0.86
# function to calculate classification random forest models with balanced sample sizes across categories
balanced_rf <- function(X, num.trees = 1000, random = FALSE, seed = 506){
# get smallest n across individuals
min.n <- min(table(X$indiv))
# use seed
set.seed(seed)
# randomly get rows for equal n across indivs
sel_rows <-
sapply(unique(X$indiv), function(x)
sample(rownames(X)[X$indiv == x], min.n, replace = FALSE))
# subset to those rows
X <- X[c(sel_rows), ]
# convert to factor
if (random){
# use seed
set.seed(seed)
X$indiv <- sample(X$indiv)
}
# make it a factor for ranger to work
X$indiv <- as.factor(X$indiv)
# run RF model spectral and cepstral parameters
rfm <-
ranger(
indiv ~ .,
data = X[, !names(X) %in% c("sound.files", "selec")],
num.trees = num.trees,
importance = "impurity",
probability = TRUE,
seed = seed
)
# get predicted individual from probs
pred_indiv <- apply(rfm$predictions, 1, function(x) colnames(rfm$predictions) [which.max(x)])
rfm$predictions <- data.frame(rfm$predictions, indiv = X$indiv, pred_indiv, sound.files = X$sound.files)
# remove X from start of names
names(rfm$predictions) <- gsub("^X", "", names(rfm$predictions))
return(rfm)
}
# function to calculate sensitivities at increasing RF class probabilities
sensitivity_fun <- function(X, parameters, thresholds = seq(0,1, by = 0.01)){
# get sensitivities for each group at very threshold
sensitiv_l <- lapply(X, function(x){
# extract prediction data.frame
Y <- x$aggregated_predictions
Y$max <- apply(Y[, sapply(Y, is.numeric)], 1, max)
# get sensitivity at different thresholds
sensi_l <- lapply(thresholds, function(y) data.frame(sensitivity = sum(Y$pred_indiv[Y$max >= y] == Y$actual_indiv[Y$max >= y])/ sum(Y$max >= y), n = sum(Y$max >= y) / nrow(Y)))
sensi <- do.call(rbind, sensi_l)
# add metadata
sensi$group <- x$group
sensi$n_indiv <- x$n_indiv
sensi$min_n <- x$min_n
sensi$n_calls <- nrow(Y) * sensi$n
return(sensi)
})
# put in a data frame
sensitivities <- as.data.frame(lapply(sensitiv_l, "[[", which(names(sensitiv_l[[1]]) == "sensitivity")))
# get minimum sensitivity at each probabilities
sensitivities$min.sensitivity <- apply(sensitivities, 1, min, na.rm = TRUE)
# get minimum sensitivity at each probabilities
sensitivities$mean.sensitivity <- apply(sensitivities, 1, mean, na.rm = TRUE)
# add thresholds to data frame
sensitivities$thresholds <- thresholds
# put in a data frame
sensitivities$n_calls <- rowSums(as.data.frame(lapply(sensitiv_l, "[[", which(names(sensitiv_l[[1]]) == "n_calls"))), na.rm = TRUE)
sensitivities$n_calls_prop <- sensitivities$n_calls / max(sensitivities$n_calls)
sensitivities <- sensitivities[!is.infinite(sensitivities$mean.sensitivity), ]
sensitivities$parameters <- parameters
return(sensitivities)
}
# read ext sel tab calls
clls <- readRDS("./output/CALLS_fixed_detections_inquiry_calls.RDS")
# read metadata
metadat <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx",
sheet = "Experimento video coor vuelo"))
# nrow(metadat)
# remove video calibration metadat <- metadat[metadat$`tipo de video` !=
# 'calibracion de video', ]
metadat <- metadat[!is.na(metadat$Audio), ]
# get audio file name from sound files
clls$Audio <- sapply(clls$sound.files, function(x) {
# split by _
spl <- strsplit(x, split = "_")[[1]]
# take 3rd
spl <- spl[grep(".wav$", spl)]
# remove .wav
spl <- gsub(".wav", "", spl)
# make it anumber
spl <- as.numeric(spl)
return(spl)
})
# anyNA(clls$Audio)
# nrow(clls)
clls <- clls[clls$Audio %in% metadat$Audio, ]
# add metadata to ext sel table
clls2 <- merge(clls, metadat[!duplicated(metadat$Audio), c("Audio", "tipo de video",
"Experimento", "Grupo", "Individuo")])
clls <- fix_extended_selection_table(X = clls2, Y = clls)
# exclude those with obstacles
clls <- clls[clls$Experimento != "Búsqueda refugio con obstaculos", ]
clls$Individuo[clls$Experimento != "vuelo solo"] <- NA
# loop to measure acoustic parameters on each group
acous_param_l <- lapply(unique(clls$Grupo), function(x) {
print(x)
print(which(unique(clls$Grupo) == x)/length(unique(clls$Grupo)))
# get 1 group data
grp_test <- clls[clls$Grupo == x, ]
# keep only solo flights
grp_test <- grp_test[grp_test$Experimento != "vuelo grupal/enmascarando busqueda",
]
# remove low SNR calls
grp_test <- sig2noise(grp_test, mar = 0.025, pb = FALSE)
grp_test <- grp_test[grp_test$SNR > 1, ]
grp_test$SNR <- NULL
tb <- aggregate(Audio ~ Grupo + Experimento, data = grp_test, length)
# if group flight then go, otherwise return NA
if (any(grepl("grupal", tb$Experimento)) & length(unique(grp_test$Individuo[grp_test$Experimento ==
"vuelo solo"])) > 1) {
######## measure acoustic structure ###### measure spectrographic parameters
sp <- specan(grp_test, pb = FALSE, harmonicity = TRUE)
# remove time parameters
sp <- sp[, grep("time\\.", names(sp), invert = TRUE)]
# check if there are NAs anyNA(sp)
# measure cepstral coeffs
cc <- mfcc_stats(grp_test, pb = FALSE)[, -c(1, 2)]
# spectrographic cross correlation
spxc <- xcorr(grp_test, pb = FALSE)
# MDS
spxc <- cmdscale(1 - spxc, k = 10, list. = TRUE)
spxc_mds <- spxc$points
colnames(spxc_mds) <- paste0("spxcMDS", 1:ncol(spxc_mds))
# mfcc cross correlation
mfccxc <- xcorr(grp_test, pb = FALSE, type = "mfcc")
# MDS
mfccxc <- cmdscale(1 - mfccxc, k = 10, list. = TRUE)
mfxc_mds <- mfccxc$points
colnames(mfxc_mds) <- paste0("mfxcMDS", 1:ncol(mfxc_mds))
# dynamic time warping
dtw.dists <- df_DTW(grp_test, img = FALSE, pb = FALSE, threshold.time = 1)
# MDS
dtw_mds <- cmdscale(dtw.dists, k = 10, list. = TRUE)$points
# fix colnames
colnames(dtw_mds) <- paste0("dtwMDS", 1:ncol(dtw_mds))
# put parameters in a list
acous_param_l <- list(sp, cc, dtw_mds, spxc_mds, mfxc_mds)
# remove null ones
acous_param_l <- acous_param_l[!sapply(acous_param_l, is.null)]
# bind all acoustic structure parameters together
all_acous_param <- do.call(cbind, acous_param_l)
# scale
all_acous_param[, -c(1, 2)] <- scale(all_acous_param[, -c(1, 2)])
# add individual and experiment
all_acous_param$indiv <- grp_test$Individuo
all_acous_param$experiment <- grp_test$Experimento
# remove bottom and top freq
all_acous_param$top.freq <- all_acous_param$bottom.freq <- NULL
return(all_acous_param)
} else return(NULL)
})
names(acous_param_l) <- unique(clls$Grupo)
# remove those that did not have group flights
acous_param_l <- acous_param_l[!sapply(acous_param_l, is.null)]
# remove columns with NAs across all groups get names of columns that have any NA
# in at least 1 group (list element)
na_colnms <- unique(unlist(lapply(acous_param_l, function(x) names(x)[sapply(x, anyNA)])))
# keep indiv column
na_colnms <- na_colnms[na_colnms != "indiv"]
# remove NA columns for all groups
acous_param_l <- lapply(acous_param_l, function(x) x[, !names(x) %in% na_colnms])
# save as RDS
saveRDS(acous_param_l, "./data/processed/acoustic_parameters_all_groups_all_warbler_acoustic_measurements.RDS")
# read acoustic parameter data
acous_param_l <- readRDS("./data/processed/acoustic_parameters_all_groups_all_warbler_acoustic_measurements.RDS")
# minimum sample size per group
min_n <- sapply(acous_param_l, function(x) min(table(x$indiv)))
# remove grpoups with less than 10 observations for minimum sample size
acous_param_l <- acous_param_l[!names(acous_param_l) %in% names(min_n)[min_n < 10]]
# which parameters would be measured
param_categories <- c("mfcc", "spxc", "mfxc", "dtw", "sp")
# get actual parameter names
col_names <- names(acous_param_l[[1]])
col_names <- col_names[col_names != "experiment"]
# measurement category for each measruremnt
clss_col_names <- col_names
# name it by measurement function
for (i in 1:length(param_categories))
clss_col_names[if (param_categories[i] != "sp")
grepl(c("cc", "spxc", "mfxc", "dtw")[i], col_names) else
!grepl("cc|xc|dtw|indiv|sound.files|selec", col_names)] <- param_categories[i]
# all posible combinations of 2 and 3 parameters
combs4 <- combn(param_categories, 4)
combs3 <- combn(param_categories, 3)
combs2 <- combn(param_categories, 2)
# ake it a list
combs <- c(as.data.frame(combs4), as.data.frame(combs3), as.data.frame(combs2))
# add all 4 parameters as an option to the list
combs <- c(append(combs, list(param_categories)), param_categories)
for (i in 1:length(combs)) {
rds_name <- paste0("./data/processed/averaged_random_forest_models_from_solo_flights_", paste(combs[[i]], collapse = "-"), ".RDS")
# run if file doesn't exist
if (!file.exists(rds_name)){
# avg_mods <- list()
# loop over list of acoustic parameters
avg_mods <- pblapply(names(acous_param_l),
# cl = .Options$warbleR$parallel,
cl = 1,
function(x){
# for (x in names(acous_param_l)){
print(x)
# extract data
X <- acous_param_l[[which(names(acous_param_l) == x)]]
# only solo flight
solo_rf_input <- X[X$experiment == "vuelo solo", ]
# rename rows for sel_rows
rownames(solo_rf_input) <- 1:nrow(solo_rf_input)
# order by sound file column
solo_rf_input <- solo_rf_input[order(solo_rf_input$sound.files), ]
# remove experiment column
solo_rf_input$experiment <- NULL
# subset columns to keep only those from selected acoustic measurements
solo_rf_input <- solo_rf_input[ , col_names[clss_col_names %in% c(combs[[i]], "sound.files", "selec", "indiv")]]
# run random forest, set a seed to make it replicable
rf_results <- pblapply(1:reps, function(x) balanced_rf(X = solo_rf_input, num.trees = num.trees, seed = x), cl = 1)
# merge together predictions by sound files
rf_preds <- lapply(rf_results, function(x){
mrg <- merge(data.frame(sound.files = solo_rf_input$sound.files), x$predictions[, grep("indiv$", names(x$predictions), invert = TRUE)], all.x = TRUE)
mrg <- mrg[order(mrg$sound.files), -1]
}
)
# add column (individual) if not found
rf_preds <- lapply(rf_preds, function(x){
if(ncol(x) < length(unique(solo_rf_input$indiv))){
# how many columns are missing
mssng <- length(unique(solo_rf_input$indiv)) - ncol(x)
# add missing columns
for(i in 1:(mssng)) x <- data.frame(x, NA, check.names = FALSE)
names(x)[(ncol(x) - mssng + 1):ncol(x)] <- setdiff(unique(solo_rf_input$indiv), names(x))
}
return(x)
})
# get together predictions from the same individual
preds_by_indv <- lapply(1:ncol(rf_preds[[1]]), function(y)
do.call(cbind, lapply(rf_preds, "[", y))
)
agg_preds <- as.data.frame(lapply(preds_by_indv, rowMeans, na.rm = TRUE))
# add individual name to columns
names(agg_preds) <- names(rf_preds[[1]])
# add sound file column
agg_preds$sound.files <- solo_rf_input$sound.files
# get predicted indiv from aggregated probabilities
agg_preds$pred_indiv <- apply(agg_preds[, sapply(agg_preds, is.numeric)], 1, function(x) colnames(agg_preds)[which.max(x)])
# make it a factor
pred_indiv <- factor(agg_preds$pred_indiv, levels = unique(solo_rf_input$indiv))
agg_preds$actual_indiv <- actual_indiv <- factor(solo_rf_input$indiv, levels = unique(solo_rf_input$indiv))
# get confusion matrix
cm_solo <- confusionMatrix(pred_indiv, reference = actual_indiv)
### NULL MODEL
# run null model by randomizing indiv labels
rf_null_results <- pblapply(1:reps, function(x) balanced_rf(X = solo_rf_input, num.trees = num.trees, random = TRUE, seed = x), cl = 1)
# get accuracies form null models
# merge together predictions by sound files
rf_null_preds <- lapply(rf_null_results, function(x){
mrg <- merge(data.frame(sound.files = solo_rf_input$sound.files), x$predictions[, grep("indiv$", names(x$predictions), invert = TRUE)], all.x = TRUE)
mrg <- mrg[order(mrg$sound.files), -1]
}
)
# add column (individual) if not found
rf_null_preds <- lapply(rf_null_preds, function(x){
if(ncol(x) < length(unique(solo_rf_input$indiv))){
# how many columns are missing
mssng <- length(unique(solo_rf_input$indiv)) - ncol(x)
# add missing columns
for(i in 1:(mssng)) x <- data.frame(x, NA, check.names = FALSE)
names(x)[(ncol(x) - mssng + 1):ncol(x)] <- setdiff(unique(solo_rf_input$indiv), names(x))
}
return(x)
})
# get together predictions from the same individual
preds_by_indv_null <- lapply(1:ncol(rf_null_preds[[1]]), function(y)
do.call(cbind, lapply(rf_null_preds, "[", y))
)
agg_preds_null <- as.data.frame(lapply(preds_by_indv_null, rowMeans, na.rm = TRUE))
# add individual name to columns
names(agg_preds_null) <- names(rf_null_preds[[1]])
# add sound file column
agg_preds_null$sound.files <- solo_rf_input$sound.files
# get predicted indiv from aggregated probabilities
agg_preds_null$pred_indiv <- apply(agg_preds_null[, sapply(agg_preds_null, is.numeric)], 1, function(x) colnames(agg_preds_null)[which.max(x)])
# make it a factor
pred_indiv_null <- factor(agg_preds_null$pred_indiv, levels = unique(solo_rf_input$indiv))
actual_indiv <- factor(solo_rf_input$indiv, levels = unique(solo_rf_input$indiv))
# get confusion matrix
cm_solo_null <- confusionMatrix(pred_indiv_null, reference = actual_indiv)
### NOTE: ranger() OOB prediction error and confusionMatrix() Accuracy are the same
# put all results together
output <- list(group = x, accuracy = cm_solo$overall[1], null_accuracy = cm_solo_null$overall[1], aggregated_predictions = agg_preds, conf_matrix = cm_solo, random_forests = rf_results, n_indiv = length(unique(solo_rf_input$indiv)), min_n = min(table(solo_rf_input$indiv)), parameters = combs[[i]])
# avg_mods[[length(avg_mods) + 1]] <- output
# rm(output)
return(output)
}
)
# add group name to list
names(avg_mods) <- names(acous_param_l)
# save as RDS
saveRDS(avg_mods, rds_name)
}
}
# read rf outputs
rds_rf_outputs <- list.files(path = "./data/processed", pattern = "averaged_random_forest_models",
full.names = TRUE)
model_diagnostics_l <- pblapply(rds_rf_outputs, cl = 1, function(x) {
# read data
avg_mods <- readRDS(x)
sensitivities <- sensitivity_fun(X = avg_mods, parameters = paste(avg_mods[[1]]$parameters,
collapse = "-"))
# calculate threshold at cutoff
thresh_prob <- min(sensitivities$thresholds[sensitivities$mean.sensitivity >=
cutoff])
avg_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "accuracy"))
null_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "null_accuracy"))
filtered_accuracy <- unlist(sensitivities[sensitivities$thresholds == thresh_prob,
1:length(null_accuracy)])
diagnostics <- data.frame(group = gsub("X", "", names(filtered_accuracy)), model = rep(c("real data",
"null model", "filtered model"), each = length(avg_accuracy)), accuracy = c(avg_accuracy,
null_accuracy, filtered_accuracy), acoustic_parametes = gsub("./data/processed/averaged_random_forest_models_from_solo_flights_|.RDS",
"", x))
diagnostics$model <- factor(diagnostics$model, levels = c("null model", "filtered model",
"real data"))
return(list(diagnostics = diagnostics, sensitivities = sensitivities))
})
# put in a data frame
model_diagnostics <- do.call(rbind, lapply(model_diagnostics_l, "[[", 1))
saveRDS(model_diagnostics, "./data/processed/random_forests_diagnostics_several_acoustic_parameter_combinations_solo_flight.RDS")
# save sensitivities
senstivities_l <- lapply(model_diagnostics_l, "[[", 2)
# determine all column names in all selection tables
cnms <- unique(unlist(lapply(senstivities_l, names)))
# add columns that are missing to each selection table
senstivities_l <- lapply(senstivities_l, function(X) {
nms <- names(X)
if (length(nms) != length(cnms))
for (i in cnms[!cnms %in% nms]) {
X <- data.frame(X, NA, stringsAsFactors = FALSE, check.names = FALSE)
names(X)[ncol(X)] <- i
}
return(X)
})
model_sensitivities <- do.call(rbind, senstivities_l)
saveRDS(model_sensitivities, "./data/processed/random_forests_sensitivities_several_acoustic_parameter_combinations_solo_flight.RDS")
# read diagnostic
model_diagnostics <- readRDS("./data/processed/random_forests_diagnostics_several_acoustic_parameter_combinations_solo_flight.RDS")
# make factor to order plot x axis ticks
model_diagnostics$acoustic_parametes <- factor(model_diagnostics$acoustic_parametes,
levels = sort(unique(model_diagnostics$acoustic_parametes)))
# make it numeric for points/lines
model_diagnostics$model <- factor(model_diagnostics$model)
model_diagnostics$model_num <- as.numeric(model_diagnostics$model)
model_diagnostics$model_num[model_diagnostics$model != "null model"] <- 2
# density plots
ggplot(model_diagnostics[model_diagnostics$model != "filtered model", ], aes(x = accuracy,
fill = model)) + geom_density(alpha = 0.4) + theme_classic() + scale_fill_viridis_d() +
ggtitle("Original (non-filtered) accuracies") + labs(x = "Mean accuracy", y = "Frequency") +
facet_wrap(~acoustic_parametes, ncol = 4)
ggplot(model_diagnostics[model_diagnostics$model != "real data", ], aes(x = accuracy,
fill = model)) + geom_density(alpha = 0.4) + theme_classic() + scale_fill_viridis_d() +
ggtitle("Optimized accuracies") + labs(x = "Mean accuracy", y = "Frequency") +
facet_wrap(~acoustic_parametes, ncol = 4)
#
sensitivities <- readRDS("./data/processed/random_forests_sensitivities_several_acoustic_parameter_combinations_solo_flight.RDS")
# Probability threshold vs Mean sensitivty ggplot(data = sensitivities, aes(x =
# thresholds, y = mean.sensitivity)) + geom_hline(yintercept = cutoff, col =
# magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + # geom_vline(xintercept =
# thresh_prob, col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) +
# geom_point(col = magma(10, alpha = 0.7)[2]) + geom_line(col = magma(10, alpha =
# 0.7)[2]) + labs(y = 'Mean sensitivty', x = 'Probability threshold') +
# theme_classic() + # annotate(geom = 'text', x = thresh_prob + 0.04, y = 0.9,
# label = round(thresh_prob, 2)) + facet_wrap(~ parameters, ncol = 4) # calculate
# threshold at n_call_cutoff <-
# max(sensitivities$n_calls_prop[sensitivities$mean.sensitivity >= cutoff])
# # Probability threshold vs Proportion of calls used ggplot(data =
# sensitivities, aes(x = thresholds, y = n_calls_prop)) + geom_hline(yintercept =
# n_call_cutoff, col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + #
# geom_vline(xintercept = thresh_prob, col = magma(10, alpha = 0.7)[8], size =
# 1.6, lty = 3) + geom_point(col = magma(10, alpha = 0.7)[2]) + geom_line(col =
# magma(10, alpha = 0.7)[2]) + labs(y = 'Proportion of calls used', x =
# 'Probability threshold') + theme_classic() + # annotate(geom = 'text', x =
# n_call_cutoff + 0.04, y = 0.9, label = round(n_call_cutoff, 2)) facet_wrap(~
# parameters, ncol = 4) ggplot(data = sensitivities, aes(x = mean.sensitivity, y
# = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff, col = magma(10, alpha
# = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = cutoff, col =
# magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10,
# alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y =
# 'Proportion of calls used', x = 'Sensitivity') + theme_classic() + #
# annotate(geom = 'text', y = n_call_cutoff + 0.04, x = cutoff, label =
# round(n_call_cutoff, 2)) facet_wrap(~ parameters, ncol = 4)
maxs <- sapply(split(sensitivities, sensitivities$parameters), function(Z) max(Z$n_calls_prop[Z$mean.sensitivity >=
cutoff]))
max_prop_calls <- data.frame(data_set = names(maxs), max_prop_calls = maxs)
max_prop_calls <- max_prop_calls[order(-max_prop_calls$max_prop_calls), ]
df1 <- knitr::kable(max_prop_calls, row.names = FALSE, escape = FALSE, format = "html",
digits = 2)
kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE, font_size = 18)
| data_set | max_prop_calls |
|---|---|
| mfcc-spxc-mfxc-sp | 0.98 |
| mfcc-spxc-mfxc-dtw-sp | 0.97 |
| mfcc-mfxc-dtw-sp | 0.96 |
| mfcc-mfxc-sp | 0.95 |
| mfcc-spxc-sp | 0.94 |
| mfcc-spxc-dtw-sp | 0.93 |
| mfcc-spxc-mfxc-dtw | 0.93 |
| mfcc-sp | 0.92 |
| mfcc-dtw-sp | 0.92 |
| mfcc-spxc-mfxc | 0.92 |
| mfcc-mfxc | 0.91 |
| mfcc-mfxc-dtw | 0.91 |
| spxc-mfxc | 0.91 |
| spxc-mfxc-dtw | 0.89 |
| spxc-mfxc-dtw-sp | 0.88 |
| spxc-mfxc-sp | 0.87 |
| mfcc-spxc-dtw | 0.86 |
| mfcc-spxc | 0.86 |
| spxc-sp | 0.83 |
| spxc-dtw-sp | 0.83 |
| mfcc | 0.83 |
| mfxc-dtw-sp | 0.82 |
| mfcc-dtw | 0.81 |
| sp-mfcc-dtw | 0.79 |
| spxc | 0.79 |
| mfxc-sp | 0.78 |
| spxc-dtw | 0.73 |
| dtw-sp | 0.71 |
| mfxc | 0.71 |
| sp | 0.69 |
| mfxc-dtw | 0.68 |
| dtw | 0.01 |
# read rf outputs
avg_mods <- readRDS(paste0("./data/processed/averaged_random_forest_models_from_solo_flights_",
max_prop_calls$data_set[1], ".RDS"))
sensitivities <- sensitivity_fun(X = avg_mods, parameters = "mfcc-sp")
# calculate threshold at cutoff
thresh_prob <- min(sensitivities$thresholds[sensitivities$mean.sensitivity >= cutoff])
avg_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "accuracy"))
null_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "null_accuracy"))
filtered_accuracy <- unlist(sensitivities[sensitivities$thresholds == thresh_prob,
1:length(null_accuracy)])
df <- data.frame(group = gsub("X", "", names(filtered_accuracy)), model = rep(c("real data",
"null model", "filtered model"), each = length(avg_accuracy)), accuracy = c(avg_accuracy,
null_accuracy, filtered_accuracy))
df$model <- factor(df$model, levels = c("null model", "filtered model", "real data"))
# violin plots
ggplot(df[df$model != "filtered model", ], aes(y = accuracy, x = model, fill = model)) +
geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle("Original accuracies") +
labs(y = "Mean accuracy", x = "Model") + geom_point(aes(x = rep(c(2, 1), each = nrow(df)/3)),
size = 5, show.legend = FALSE, col = "gray43", alpha = 0.8) + geom_line(aes(x = rep(c(2,
1), each = nrow(df)/3), group = group), col = "gray43", alpha = 0.8)
ggplot(df[df$model != "real data", ], aes(y = accuracy, x = model, fill = model)) +
geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle("Optimized accuracies") +
labs(y = "Mean accuracy", x = "Model") + geom_point(aes(x = rep(c(1, 2), each = nrow(df)/3)),
size = 3, show.legend = FALSE, col = "gray43", alpha = 0.8) + geom_line(aes(x = rep(c(1,
2), each = nrow(df)/3), group = group), col = "gray43", alpha = 0.8)
# density plots
ggplot(df[df$model != "filtered model", ], aes(x = accuracy, fill = model)) + geom_density(alpha = 0.4) +
theme_classic() + scale_fill_viridis_d() + ggtitle("Original (non-filtered) accuracies") +
labs(x = "Mean accuracy", y = "Frequency")
ggplot(df[df$model != "real data", ], aes(x = accuracy, fill = model)) + geom_density(alpha = 0.4) +
theme_classic() + scale_fill_viridis_d() + ggtitle("Optimized accuracies") +
labs(x = "Mean accuracy", y = "Frequency")
# Probability threshold vs Mean sensitivty
ggplot(data = sensitivities, aes(x = thresholds, y = mean.sensitivity)) + geom_hline(yintercept = cutoff,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = thresh_prob,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10,
alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Mean sensitivty",
x = "Probability threshold") + theme_classic() + annotate(geom = "text", x = thresh_prob +
0.04, y = 0.9, label = round(thresh_prob, 2))
# calculate threshold at
n_call_cutoff <- max(sensitivities$n_calls_prop[sensitivities$mean.sensitivity >=
cutoff])
# Probability threshold vs Proportion of calls used
ggplot(data = sensitivities, aes(x = thresholds, y = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = thresh_prob,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10,
alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Proportion of calls used",
x = "Probability threshold") + theme_classic() + annotate(geom = "text", x = n_call_cutoff +
0.04, y = 0.9, label = round(n_call_cutoff, 2))
ggplot(data = sensitivities, aes(x = mean.sensitivity, y = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = cutoff,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10,
alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Proportion of calls used",
x = "Sensitivity") + theme_classic() + annotate(geom = "text", y = n_call_cutoff +
0.04, x = cutoff, label = round(n_call_cutoff, 2))
# # remove group 42 AND 6 avg_mods <- avg_mods[!names(avg_mods) %in% c('42', '6',
# '21', '29')] # calculate sensitivities sensitivities <- sensitivity_fun(X =
# avg_mods, parameters = max_prop_calls$data_set[1]) # calculate threshold at
# cutoff thresh_prob <-
# min(sensitivities$thresholds[sensitivities$mean.sensitivity >= cutoff])
# avg_accuracy <- sapply(avg_mods, '[[', which(names(avg_mods[[1]]) ==
# 'accuracy')) null_accuracy <- sapply(avg_mods, '[[', which(names(avg_mods[[1]])
# == 'null_accuracy')) filtered_accuracy <-
# unlist(sensitivities[sensitivities$thresholds == thresh_prob,
# 1:length(null_accuracy)]) df <- data.frame(group = gsub('X', '',
# names(filtered_accuracy)), model = rep(c('real data', 'null model', 'filtered
# model'), each = length(avg_accuracy)), accuracy = c(avg_accuracy,
# null_accuracy, filtered_accuracy)) df$model <- factor(df$model, levels =
# c('null model', 'filtered model', 'real data')) # violin plots
# ggplot(df[df$model != 'filtered model', ], aes(y = accuracy, x = model, fill =
# model)) + geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) +
# ggtitle('Original accuracies') + labs(y = 'Mean accuracy', x = 'Model') +
# geom_point(aes(x = rep(c(2, 1), each = nrow(df)/3)), size = 5, show.legend =
# FALSE, col = 'gray43', alpha = 0.8) + geom_line(aes(x = rep(c(2, 1), each =
# nrow(df)/3), group = group), col = 'gray43', alpha = 0.8) ggplot(df[df$model !=
# 'real data', ], aes(y = accuracy, x = model, fill = model)) + geom_violin() +
# theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle('Optimized
# accuracies') + labs(y = 'Mean accuracy', x = 'Model') + geom_point(aes(x =
# rep(c(1, 2), each = nrow(df)/3)), size = 3, show.legend = FALSE, col =
# 'gray43', alpha = 0.8) + geom_line(aes(x = rep(c(1, 2), each = nrow(df)/3),
# group = group), col = 'gray43', alpha = 0.8) # density plots ggplot(df[df$model
# != 'filtered model', ], aes(x = accuracy, fill = model)) + geom_density(alpha =
# 0.4) + theme_classic() + scale_fill_viridis_d() + ggtitle('Original
# (non-filtered) accuracies') + labs(x = 'Mean accuracy', y = 'Frequency')
# ggplot(df[df$model != 'real data', ], aes(x = accuracy, fill = model)) +
# geom_density(alpha = 0.4) + theme_classic() + scale_fill_viridis_d() +
# ggtitle('Optimized accuracies') + labs(x = 'Mean accuracy', y = 'Frequency')
# ggplot(data = sensitivities, aes(x = thresholds, y = mean.sensitivity)) +
# geom_hline(yintercept = cutoff, col = magma(10, alpha = 0.7)[8], size = 1.6,
# lty = 3) + geom_vline(xintercept = thresh_prob, col = magma(10, alpha =
# 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, alpha = 0.7)[2]) +
# geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = 'Mean sensitivty', x =
# 'Probability threshold') + theme_classic() + annotate(geom = 'text', x =
# thresh_prob + 0.04, y = 0.9, label = round(thresh_prob, 2)) # calculate
# threshold at n_call_cutoff <-
# max(sensitivities$n_calls_prop[sensitivities$mean.sensitivity >= cutoff])
# ggplot(data = sensitivities, aes(x = thresholds, y = n_calls_prop)) +
# geom_hline(yintercept = n_call_cutoff, col = magma(10, alpha = 0.7)[8], size =
# 1.6, lty = 3) + geom_vline(xintercept = thresh_prob, col = magma(10, alpha =
# 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, alpha = 0.7)[2]) +
# geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = 'Proportion of calls
# used', x = 'Probability threshold') + theme_classic() + annotate(geom = 'text',
# x = n_call_cutoff + 0.04, y = 0.9, label = round(n_call_cutoff, 2)) ggplot(data
# = sensitivities, aes(x = mean.sensitivity, y = n_calls_prop)) +
# geom_hline(yintercept = n_call_cutoff, col = magma(10, alpha = 0.7)[8], size =
# 1.6, lty = 3) + geom_vline(xintercept = cutoff, col = magma(10, alpha =
# 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, alpha = 0.7)[2]) +
# geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = 'Proportion of calls
# used', x = 'Sensitivity') + theme_classic() + annotate(geom = 'text', y =
# n_call_cutoff + 0.04, x = cutoff, label = round(n_call_cutoff, 2))
# remove group 42 AND 6
avg_mods <- avg_mods[!names(avg_mods) %in% c("42", "6", "21", "29")]
# calculate sensitivities
sensitivities <- sensitivity_fun(X = avg_mods, parameters = max_prop_calls$data_set[1])
# calculate threshold at cutoff
thresh_prob <- min(sensitivities$thresholds[sensitivities$mean.sensitivity >= cutoff])
avg_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "accuracy"))
null_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "null_accuracy"))
filtered_accuracy <- unlist(sensitivities[sensitivities$thresholds == thresh_prob,
1:length(null_accuracy)])
df <- data.frame(group = gsub("X", "", names(filtered_accuracy)), model = rep(c("real data",
"null model", "filtered model"), each = length(avg_accuracy)), accuracy = c(avg_accuracy,
null_accuracy, filtered_accuracy))
df$model <- factor(df$model, levels = c("null model", "filtered model", "real data"))
# violin plots
ggplot(df[df$model != "filtered model", ], aes(y = accuracy, x = model, fill = model)) +
geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle("Original accuracies") +
labs(y = "Mean accuracy", x = "Model") + geom_point(aes(x = rep(c(2, 1), each = nrow(df)/3)),
size = 5, show.legend = FALSE, col = "gray43", alpha = 0.8) + geom_line(aes(x = rep(c(2,
1), each = nrow(df)/3), group = group), col = "gray43", alpha = 0.8)
ggplot(df[df$model != "real data", ], aes(y = accuracy, x = model, fill = model)) +
geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle("Optimized accuracies") +
labs(y = "Mean accuracy", x = "Model") + geom_point(aes(x = rep(c(1, 2), each = nrow(df)/3)),
size = 3, show.legend = FALSE, col = "gray43", alpha = 0.8) + geom_line(aes(x = rep(c(1,
2), each = nrow(df)/3), group = group), col = "gray43", alpha = 0.8)
# density plots
ggplot(df[df$model != "filtered model", ], aes(x = accuracy, fill = model)) + geom_density(alpha = 0.4) +
theme_classic() + scale_fill_viridis_d() + ggtitle("Original (non-filtered) accuracies") +
labs(x = "Mean accuracy", y = "Frequency")
ggplot(df[df$model != "real data", ], aes(x = accuracy, fill = model)) + geom_density(alpha = 0.4) +
theme_classic() + scale_fill_viridis_d() + ggtitle("Optimized accuracies") +
labs(x = "Mean accuracy", y = "Frequency")
ggplot(data = sensitivities, aes(x = thresholds, y = mean.sensitivity)) + geom_hline(yintercept = cutoff,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = thresh_prob,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10,
alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Mean sensitivty",
x = "Probability threshold") + theme_classic() + annotate(geom = "text", x = thresh_prob +
0.04, y = 0.9, label = round(thresh_prob, 2))
# calculate threshold at
n_call_cutoff <- max(sensitivities$n_calls_prop[sensitivities$mean.sensitivity >=
cutoff])
ggplot(data = sensitivities, aes(x = thresholds, y = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = thresh_prob,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10,
alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Proportion of calls used",
x = "Probability threshold") + theme_classic() + annotate(geom = "text", x = n_call_cutoff +
0.04, y = 0.9, label = round(n_call_cutoff, 2))
ggplot(data = sensitivities, aes(x = mean.sensitivity, y = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = cutoff,
col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10,
alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Proportion of calls used",
x = "Sensitivity") + theme_classic() + annotate(geom = "text", y = n_call_cutoff +
0.04, x = cutoff, label = round(n_call_cutoff, 2))
# read acoustic parameter data
acous_param_l <- readRDS("./data/processed/acoustic_parameters_all_groups_all_warbler_acoustic_measurements.RDS")
acous_param_l <- acous_param_l[names(acous_param_l) %in% names(avg_mods)]
# extract random forests from acous_param_l list
random_forests_l <- lapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "random_forests"))
agg_preds_l <- lapply(names(random_forests_l), function(x) {
# print(x)
Z <- acous_param_l[[x]]
Y <- Z[Z$experiment == "vuelo grupal/sin sonido", ]
Y$indiv <- NULL
# random forest models for this group
rfms <- random_forests_l[[x]]
# predict using all random forest models
rf_preds <- lapply(rfms, function(x) predict(object = x, data = Y)$predictions)
# add column (individual) if not found
rf_preds <- lapply(rf_preds, function(x) {
if (ncol(x) < length(unique(Z$indiv[Z$experiment == "vuelo solo"]))) {
# how many columns are missing
mssng <- length(unique(Z$indiv[Z$experiment == "vuelo solo"])) - ncol(x)
# add missing columns with 0
for (i in 1:(mssng)) x <- data.frame(x, 0, check.names = FALSE)
names(x)[(ncol(x) - mssng + 1):ncol(x)] <- setdiff(unique(Z$indiv[Z$experiment ==
"vuelo solo"]), names(x))
}
return(x)
})
# get together predictions from the same individual
preds_by_indv <- lapply(1:ncol(rf_preds[[1]]), function(y) do.call(cbind, lapply(rf_preds,
function(e) e[, y])))
agg_preds <- as.data.frame(lapply(preds_by_indv, rowMeans, na.rm = TRUE))
# add individual name to columns
names(agg_preds) <- colnames(rf_preds[[1]])
# add sound file column
agg_preds$sound.files <- Y$sound.files
# get predicted indiv from aggregated probabilities
agg_preds$pred_indiv <- apply(agg_preds[, sapply(agg_preds, is.numeric)], 1,
function(x) colnames(agg_preds)[which.max(x)])
agg_preds$group <- x
agg_preds$max_prob <- apply(agg_preds[, sapply(agg_preds, is.numeric)], 1, max)
return(agg_preds)
})
# add group name to list
names(agg_preds_l) <- names(acous_param_l)
agg_pred <- do.call(rbind, lapply(agg_preds_l, function(x) x[, c("group", "sound.files",
"max_prob", "pred_indiv")]))
# remove those groups with low sensitivity
agg_pred <- agg_pred[!agg_pred$group %in% c("42", "6"), ]
# save as RDS
saveRDS(agg_pred, "./data/processed/predicted_individual_in_group_flights.RDS")
# read as RDS
agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights.RDS")
# get summary by group
summ_by_groups <- lapply(unique(agg_pred$group), function(x) {
Y <- agg_pred[agg_pred$group == x, ]
# total number of calls used
n_used_calls <- sum(Y$max_prob > thresh_prob)
# proportion of calls used
prop_used_calls <- sum(Y$max_prob > thresh_prob)/nrow(Y)
return(data.frame(group = x, n_used_calls, total_calls = nrow(Y), prop_used_calls))
})
summ_by_group <- do.call(rbind, summ_by_groups)
# add row with total across all groups
total_row <- data.frame(group = "All groups", n_used_calls = sum(summ_by_group$n_used_calls),
total_calls = sum(summ_by_group$total_calls))
total_row$prop_used_calls <- total_row$n_used_calls/total_row$total_calls
# bind totals to summary table
summ_by_group <- rbind(summ_by_group, total_row)
# print pretty table
df1 <- knitr::kable(summ_by_group, row.names = TRUE, escape = FALSE, format = "html",
digits = 2)
df1 <- kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed",
"responsive"), full_width = FALSE, font_size = 18)
row_spec(df1, row = nrow(summ_by_group), bold = TRUE, color = "white", background = viridis(10)[8])
| group | n_used_calls | total_calls | prop_used_calls | |
|---|---|---|---|---|
| 1 | 4 | 69 | 69 | 1 |
| 2 | 3 | 176 | 176 | 1 |
| 3 | 1 | 46 | 46 | 1 |
| 4 | 40 | 3 | 3 | 1 |
| 5 | 9 | 39 | 39 | 1 |
| 6 | 15 | 59 | 59 | 1 |
| 7 | 13 | 5 | 5 | 1 |
| 8 | 18-A-B | 20 | 20 | 1 |
| 9 | 6/? | 72 | 72 | 1 |
| 10 | 12/12/N/N | 15 | 15 | 1 |
| 11 | 12/NN | 58 | 58 | 1 |
| 12 | All groups | 562 | 562 | 1 |
Using all calls, including those with low class probabilities
# read acoustic parameter data
acous_param_l <- readRDS("./data/processed/acoustic_parameters_all_groups_all_warbler_acoustic_measurements.RDS")
# exclude groups with low sensitivity (42 & 6) or low sample size after removing
# low prob calls (29)
acous_param_l <- acous_param_l[!names(acous_param_l) %in% c("42", "6", "21", "29")]
# get group and solo call counts per individual
sl_vs_grps <- lapply(names(acous_param_l), function(x) {
acous_param <- acous_param_l[[x]]
group_file <- acous_param$sound.files[acous_param$experiment == "vuelo grupal/sin sonido"][1]
# get sound file number for group flights
group_file <- strsplit(group_file, ".wav")[[1]][1]
group_file_num <- as.numeric(substr(group_file, start = nchar(group_file) - 6,
nchar(group_file)))
# get data for solo flights
solo <- acous_param[acous_param$experiment == "vuelo solo", ]
sl_grp_cll <- lapply(unique(solo$indiv), function(y) {
solo_calls <- sum(solo$indiv == y)
group_calls <- sum(agg_pred$group == x & agg_pred$pred_indiv == y)
# get sound file number for group flights
solo_file <- solo$sound.files[solo$indiv == y][1]
solo_file <- strsplit(solo_file, ".wav")[[1]][1]
solo_file_num <- as.numeric(substr(solo_file, start = nchar(solo_file) -
6, nchar(solo_file)))
return(data.frame(group = x, indiv = y, experiment = c("solo", "group"),
calls = c(solo_calls, group_calls), prop_calls = c(solo_calls/nrow(solo),
group_calls/sum(agg_pred$group == x)), sound_file_number = c(solo_file_num,
group_file_num)))
})
sl_grp_clls <- do.call(rbind, sl_grp_cll)
return(sl_grp_clls)
})
sl_vs_grp <- do.call(rbind, sl_vs_grps)
# remove 21 due to no group calls left
sl_vs_grp <- sl_vs_grp[sl_vs_grp$group != "21", ]
# add metadata
caps <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", sheet = "Capturas"))
exps_mtdt <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx",
sheet = "Experimento video coor vuelo"))
# time in seconds
exps_mtdt$flight_time <- round(as.numeric(exps_mtdt$`Tiempo de vuelo aprox`) * 60/0.04166667)
# remove NA in audio
exps_mtdt <- exps_mtdt[!is.na(exps_mtdt$Audio), ]
## Add flight time for solo and group flight
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {
out <- if (sl_vs_grp$experiment[x] == "solo")
unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Individuo == sl_vs_grp$indiv[x] &
exps_mtdt$Experimento == "vuelo solo" & !is.na(exps_mtdt$Individuo)])) else unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Grupo == sl_vs_grp$group[x] &
exps_mtdt$Experimento == "vuelo grupal/sin sonido"]))
return(out)
})
# flight time
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {
ft <- unique(na.omit(exps_mtdt$flight_time[which(as.numeric(exps_mtdt$Audio) ==
sl_vs_grp$sound_file_number[x])]))
if (length(ft) == 0)
ft <- NA
return(ft)
})
# add sex
sl_vs_grp$sex <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Sexo[caps$Murci ==
x])[1], USE.NAMES = FALSE)
sl_vs_grp$sex <- ifelse(sl_vs_grp$sex == "m", "Male", "Female")
# add age
sl_vs_grp$age <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Edad[caps$Murci ==
x])[1], USE.NAMES = FALSE)
sl_vs_grp$age <- ifelse(sl_vs_grp$age == "sa", "Sub-adult", "Adult")
sl_vs_grp$reprod.stg <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$`Estado reproductivo`[caps$Murci ==
x])[1], USE.NAMES = FALSE)
sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "p?"] <- "Pregnant"
sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "ne"] <- "Inactive"
# order levels
sl_vs_grp$experiment <- factor(sl_vs_grp$experiment, levels = c("solo", "group"))
sl_vs_grp$call_rate <- sl_vs_grp$calls/(sl_vs_grp$flight_time/60)
# calling rate solo vs group flights ggplot(sl_vs_grp, aes(y = call_rate, x =
# experiment, col = indiv, shape = sex)) + geom_hline(yintercept =
# mean(sl_vs_grp$call_rate[sl_vs_grp$experiment == 'group']), lty = 2, alpha =
# 0.5) + geom_hline(yintercept = mean(sl_vs_grp$call_rate[sl_vs_grp$experiment ==
# 'solo'], na.rm = TRUE), lty = 2, alpha = 0.5) + geom_point(size = 5, alpha =
# 0.8) + theme_classic() + scale_color_viridis_d(alpha = 0.4, guide=FALSE) +
# ggtitle('Change in calling rate by sex and individual') + labs(x =
# 'Experiment', y = 'Calling rate (calls / min)') + geom_line(aes(group = indiv),
# col = 'gray43', alpha = 0.8) + facet_wrap(~ group, nrow = 4)
# Using only calls with class probabilities higher than `r thresh_prob`
# get group and solo call counts per individual
sl_vs_grps <- lapply(names(acous_param_l), function(x) {
acous_param <- acous_param_l[[x]]
group_file <- acous_param$sound.files[acous_param$experiment == "vuelo grupal/sin sonido"][1]
# get sound file number for group flights
group_file <- strsplit(group_file, ".wav")[[1]][1]
group_file_num <- as.numeric(substr(group_file, start = nchar(group_file) - 6,
nchar(group_file)))
# get data for solo flights
solo <- acous_param[acous_param$experiment == "vuelo solo", ]
sl_grp_cll <- lapply(unique(solo$indiv), function(y) {
solo_calls <- sum(solo$indiv == y)
# taking into acount those with class prob higher than
group_calls <- sum(agg_pred$group == x & agg_pred$pred_indiv == y & agg_pred$max_prob >
thresh_prob)
# get sound file number for group flights
solo_file <- solo$sound.files[solo$indiv == y][1]
solo_file <- strsplit(solo_file, ".wav")[[1]][1]
solo_file_num <- as.numeric(substr(solo_file, start = nchar(solo_file) -
6, nchar(solo_file)))
return(data.frame(group = x, indiv = y, experiment = c("solo", "group"),
calls = c(solo_calls, group_calls), prop_calls = c(solo_calls/nrow(solo),
group_calls/sum(agg_pred$group == x)), sound_file_number = c(solo_file_num,
group_file_num)))
})
sl_grp_clls <- do.call(rbind, sl_grp_cll)
return(sl_grp_clls)
})
sl_vs_grp <- do.call(rbind, sl_vs_grps)
# remove 21 due to no group calls left
sl_vs_grp <- sl_vs_grp[sl_vs_grp$group != "21", ]
# add metadata
caps <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", sheet = "Capturas"))
exps_mtdt <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx",
sheet = "Experimento video coor vuelo"))
# time in seconds
exps_mtdt$flight_time <- round(as.numeric(exps_mtdt$`Tiempo de vuelo aprox`) * 60/0.04166667)
# remove NA in audio
exps_mtdt <- exps_mtdt[!is.na(exps_mtdt$Audio), ]
## Add flight time for solo and group flight
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {
out <- if (sl_vs_grp$experiment[x] == "solo")
unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Individuo == sl_vs_grp$indiv[x] &
exps_mtdt$Experimento == "vuelo solo" & !is.na(exps_mtdt$Individuo)])) else unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Grupo == sl_vs_grp$group[x] &
exps_mtdt$Experimento == "vuelo grupal/sin sonido"]))
return(out)
})
# flight time
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {
ft <- unique(na.omit(exps_mtdt$flight_time[which(as.numeric(exps_mtdt$Audio) ==
sl_vs_grp$sound_file_number[x])]))
if (length(ft) == 0)
ft <- NA
return(ft)
})
# add sex
sl_vs_grp$sex <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Sexo[caps$Murci ==
x])[1], USE.NAMES = FALSE)
sl_vs_grp$sex <- ifelse(sl_vs_grp$sex == "m", "Male", "Female")
# add age
sl_vs_grp$age <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Edad[caps$Murci ==
x])[1], USE.NAMES = FALSE)
sl_vs_grp$age <- ifelse(sl_vs_grp$age == "sa", "Sub-adult", "Adult")
sl_vs_grp$reprod.stg <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$`Estado reproductivo`[caps$Murci ==
x])[1], USE.NAMES = FALSE)
sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "p?"] <- "Pregnant"
sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "ne"] <- "Inactive"
# order levels
sl_vs_grp$experiment <- factor(sl_vs_grp$experiment, levels = c("solo", "group"))
sl_vs_grp$call_rate <- sl_vs_grp$calls/(sl_vs_grp$flight_time/60)
# calling rate solo vs group flights ggplot(sl_vs_grp, aes(y = call_rate, x =
# experiment, col = indiv, shape = sex)) + geom_hline(yintercept =
# mean(sl_vs_grp$call_rate[sl_vs_grp$experiment == 'group']), lty = 2, alpha =
# 0.5) + geom_hline(yintercept = mean(sl_vs_grp$call_rate[sl_vs_grp$experiment ==
# 'solo'], na.rm = TRUE), lty = 2, alpha = 0.5) + geom_point(size = 5, alpha =
# 0.8) + theme_classic() + scale_color_viridis_d(alpha = 0.4, guide=FALSE) +
# ggtitle('Change in calling rate by sex and individual') + labs(x =
# 'Experiment', y = 'Calling rate (calls / min)') + geom_line(aes(group = indiv),
# col = 'gray43', alpha = 0.8) + facet_wrap(~ group, nrow = 4)
# violin plots ggplot(sl_vs_grp, aes(y = prop_calls, x = experiment, col = indiv,
# shape = sex)) + geom_point(size = 5, alpha = 0.8) + theme_classic() +
# scale_color_viridis_d(alpha = 0.4, guide=FALSE) + ggtitle('Original
# accuracies') + labs(y = 'Mean accuracy', x = 'Model') + geom_line(aes(group =
# indiv), col = 'gray43', alpha = 0.8) + facet_wrap(~ group, nrow = 4) +
# ggtitle(label = 'Change in calling activity by sex and individual')
# Using only calls with class probabilities higher than `r thresh_prob` and
# assinging low probability calls evenly across individuals (as all calls are
# above the threshold then all calls are being used)
# get group and solo call counts per individual
sl_vs_grps <- lapply(names(acous_param_l), function(x) {
acous_param <- acous_param_l[[x]]
group_file <- acous_param$sound.files[acous_param$experiment == "vuelo grupal/sin sonido"][1]
# get sound file number for group flights
group_file <- strsplit(group_file, ".wav")[[1]][1]
group_file_num <- as.numeric(substr(group_file, start = nchar(group_file) - 6,
nchar(group_file)))
# get data for solo flights
solo <- acous_param[acous_param$experiment == "vuelo solo", ]
sl_grp_cll <- lapply(unique(solo$indiv), function(y) {
solo_calls <- sum(solo$indiv == y)
# taking into acount those with class prob higher than
group_calls <- sum(agg_pred$group == x & agg_pred$pred_indiv == y & agg_pred$max_prob >
thresh_prob)
# add low probability calls to group calls
group_calls <- group_calls + sum(agg_pred$group == x & agg_pred$max_prob <=
thresh_prob)/length(unique(agg_pred$pred_indiv[agg_pred$group == x]))
# get sound file number for group flights
solo_file <- solo$sound.files[solo$indiv == y][1]
solo_file <- strsplit(solo_file, ".wav")[[1]][1]
solo_file_num <- as.numeric(substr(solo_file, start = nchar(solo_file) -
6, nchar(solo_file)))
return(data.frame(group = x, indiv = y, experiment = c("solo", "group"),
calls = c(solo_calls, group_calls), prop_calls = c(solo_calls/nrow(solo),
group_calls/sum(agg_pred$group == x)), sound_file_number = c(solo_file_num,
group_file_num), sound_files = c(solo_file, group_file)))
})
sl_grp_clls <- do.call(rbind, sl_grp_cll)
return(sl_grp_clls)
})
sl_vs_grp <- do.call(rbind, sl_vs_grps)
# remove 21 due to no group calls left
sl_vs_grp <- sl_vs_grp[sl_vs_grp$group != "21", ]
# add metadata
caps <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", sheet = "Capturas"))
exps_mtdt <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx",
sheet = "Experimento video coor vuelo"))
# time in seconds
exps_mtdt$flight_time <- round(as.numeric(exps_mtdt$`Tiempo de vuelo aprox`) * 60/0.04166667)
# remove NA in audio
exps_mtdt <- exps_mtdt[!is.na(exps_mtdt$Audio), ]
## Add flight time for solo and group flight
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {
out <- if (sl_vs_grp$experiment[x] == "solo")
unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Individuo == sl_vs_grp$indiv[x] &
exps_mtdt$Experimento == "vuelo solo" & !is.na(exps_mtdt$Individuo)])) else unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Grupo == sl_vs_grp$group[x] &
exps_mtdt$Experimento == "vuelo grupal/sin sonido"]))
return(out)
})
# flight time
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {
ft <- unique(na.omit(exps_mtdt$flight_time[which(as.numeric(exps_mtdt$Audio) ==
sl_vs_grp$sound_file_number[x])]))
if (length(ft) == 0)
ft <- NA
return(ft)
})
# add sex
sl_vs_grp$sex <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Sexo[caps$Murci ==
x])[1], USE.NAMES = FALSE)
sl_vs_grp$sex <- ifelse(sl_vs_grp$sex == "m", "Male", "Female")
# add age
sl_vs_grp$age <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Edad[caps$Murci ==
x])[1], USE.NAMES = FALSE)
sl_vs_grp$age <- ifelse(sl_vs_grp$age == "sa", "Sub-adult", "Adult")
sl_vs_grp$reprod.stg <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$`Estado reproductivo`[caps$Murci ==
x])[1], USE.NAMES = FALSE)
sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "p?"] <- "Pregnant"
sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "ne"] <- "Inactive"
# order levels
sl_vs_grp$experiment <- factor(sl_vs_grp$experiment, levels = c("solo", "group"))
sl_vs_grp$call_rate <- sl_vs_grp$calls/(sl_vs_grp$flight_time/60)
# calling rate solo vs group flights
ggplot(sl_vs_grp[sl_vs_grp$group != "22", ], aes(y = call_rate, x = experiment, col = indiv,
shape = sex)) + geom_hline(yintercept = mean(sl_vs_grp$call_rate[sl_vs_grp$experiment ==
"group"]), lty = 2, alpha = 0.5) + geom_hline(yintercept = mean(sl_vs_grp$call_rate[sl_vs_grp$experiment ==
"solo"], na.rm = TRUE), lty = 2, alpha = 0.5) + geom_point(size = 5, alpha = 0.8) +
theme_classic() + scale_color_viridis_d(alpha = 0.4, guide = FALSE) + ggtitle("Change in calling rate by sex and individual") +
labs(x = "Experiment", y = "Calling rate (calls / min)") + geom_line(aes(group = indiv),
col = "gray43", alpha = 0.8) + facet_wrap(~group, nrow = 4)
clls <- readRDS("./output/CALLS_fixed_detections_inquiry_calls.RDS")
sl_vs_grp$sound_files <- paste0(sl_vs_grp$sound_files, ".wav")
seltab <- attributes(clls)$check.results
gap_l <- lapply(1:nrow(sl_vs_grp), function(x) {
Y <- seltab[seltab$orig.sound.files == sl_vs_grp$sound_files[x], ]
Y <- Y[order(Y$orig.start), ]
gaps <- Y$orig.start[-1] - Y$orig.end[-nrow(Y)]
return(data.frame(group = sl_vs_grp$group[x], sound.files = sl_vs_grp$sound_files[x],
experiment = sl_vs_grp$experiment[x], gaps = ifelse(length(gaps) == 0, NA,
gaps)))
})
gaps <- do.call(rbind, gap_l)
# ggplot(gaps, aes(x = gaps, fill = experiment, group = experiment)) +
# geom_density() + scale_fill_viridis_d(alpha = 0.4) + ggtitle('Gaps sec by
# group') + theme_classic() + facet_wrap(~group, ncol = 4, scales = 'free_y')
ggplot(gaps, aes(x = gaps, fill = experiment, group = experiment)) + geom_density() +
ggtitle("Gaps sec groups combined") + scale_fill_viridis_d(alpha = 0.4) + theme_classic()
### Number of calls per individual by group
agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights.RDS")
# remove repeated
agg_pred <- agg_pred[-c(200, 156, 30, 9, 160, 435, 432), ]
agg_pred$abbr_indiv <- as.numeric(as.factor(agg_pred$pred_indiv))
agg_pred$indiv <- as.factor(agg_pred$abbr_indiv)
agg_indiv <- aggregate(abbr_indiv ~ indiv + group, data = agg_pred, FUN = length)
agg_indiv$indiv <- as.character(agg_indiv$indiv)
agg_indiv <- agg_indiv[order(agg_indiv$group, agg_indiv$abbr_indiv), ]
agg_indiv$indiv <- as.factor(agg_indiv$indiv)
ggplot(agg_indiv, aes(y = abbr_indiv, x = indiv, fill = indiv)) + geom_bar(stat = "identity") +
ggtitle("Total calls per individual in group flight") + scale_fill_viridis_d(alpha = 0.7) +
facet_wrap(~group, scales = "free", nrow = 4) + guides(fill = FALSE) + theme_classic()
# read as RDS
agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights.RDS")
agg_pred$orig.sound.files <- paste0(sapply(agg_pred$sound.files, function(x) strsplit(x,
".wav")[[1]][1]), ".wav")
agg_pred$start <- sapply(agg_pred$sound.files, function(x) seltab$orig.start[seltab$sound.files ==
x])
agg_pred$end <- sapply(agg_pred$sound.files, function(x) seltab$orig.end[seltab$sound.files ==
x])
agg_pred$middle.time <- (agg_pred$start + agg_pred$end)/2
agg_pred$abbr_indiv <- as.numeric(as.factor(agg_pred$pred_indiv))
agg_pred$group2 <- make.names(agg_pred$group)
agg_pred <- agg_pred[!duplicated(agg_pred[, c("group", "abbr_indiv", "start", "end")]),
]
agg_pred$row <- 1:nrow(agg_pred)
# this are repeated agg_pred[c(200, 206, 156, 219, 30, 31, 9, 60, 160, 225, 486,
# 435, 438, 432), ]
agg_pred <- agg_pred[-c(200, 156, 30, 9, 160, 435, 432), ]
out <- lapply(unique(agg_pred$group2), function(x) {
print(x)
Y <- agg_pred[agg_pred$group2 == x, ]
# jpeg(filename = file.path('img/group_call_through_time', paste0(x, '.jpeg')),
# width = 480 * 3, height = 480 * 2, res = 200)
par(mar = c(1, 0, 0, 0), mfrow = c(4, 1))
Y$middle.time <- Y$middle.time - min(Y$middle.time)
num.ind <- as.numeric(as.factor(Y$abbr_indiv))
num.ind <- scale(num.ind, center = TRUE)
# sq <- seq(0, max(Y$middle.time), length.out = 5)
sq <- seq(0, 120, length.out = 5)
for (i in 1:(length(sq) - 1)) {
plot(0, 0, col = "white", xlim = c(sq[i] - 0.05, sq[i + 1] + 0.05), ylim = range(num.ind) +
c(-2, 2), bty = "u")
points(x = Y$middle.time, y = num.ind, cex = 7, col = viridis(max(as.numeric(as.factor(Y$abbr_indiv))),
alpha = 0.5)[as.numeric(as.factor(Y$abbr_indiv))], pch = 20)
# text(x = Y$middle.time, y = num.ind + rnorm(nrow(Y), sd = 0.8), Y$row, cex =
# 0.6, col = 'black')
}
# dev.off()
})
## [1] "X4"
## [1] "X3"
## [1] "X1"
## [1] "X40"
## [1] "X9"
## [1] "X15"
## [1] "X13"
## [1] "X18.A.B"
## [1] "X6.."
## [1] "X12.12.N.N"
## [1] "X12.NN"
lapply(unique(agg_pred$group), function(x) {
Y <- agg_pred[agg_pred$group == x, ]
mc <- markov.test(Y$abbr_indiv, type = "rank")
return(mc)
})
seq <- simulateMarkovChain(5000, matrix(0.25, 4, 4), states = c("a", "c", "g", "t"))
markov.test(seq)
rnd.seq <- sample(seq)
markov.test(rnd.seq)
sequenceMatr <- createSequenceMatrix(sequence, sanitize = FALSE)
mcFitMLE <- markovchainFit(data = sequence)
rnd.seq <- sample(sequence)
rnd <- createSequenceMatrix(rnd.seq, sanitize = FALSE)
mcFitMLE2 <- markovchainFit(data = rnd.seq)
mcFitBSP <- markovchainFit(data = sequence, method = "bootstrap", nboot = 5, name = "Bootstrap Mc")
sim.sq <- rep(c("a", "b", "c", "d"), 100)
verifyMarkovProperty(sim.sq)
sim.sq2 <- sample(c("a", "b", "c", "d"), size = 100, replace = TRUE)
verifyMarkovProperty(sim.sq2)
weatherStates <- c("sunny", "cloudy", "rain")
byRow <- TRUE
weatherMatrix <- matrix(data = c(0.7, 0.2, 0.1, 0.3, 0.4, 0.3, 0.2, 0.45, 0.35),
byrow = byRow, nrow = 3, dimnames = list(weatherStates, weatherStates))
mcWeather <- new("markovchain", states = weatherStates, byrow = byRow, transitionMatrix = weatherMatrix,
name = "Weather")
mcIgraph <- as(mcWeather, "igraph")
plot(mcIgraph)
Session information
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] spgs_1.0-3 TraMineR_2.2-1 igraph_1.2.6 markovchain_0.8.6
## [5] soundgen_2.0.0 shinyBS_0.61 Sim.DiffProc_4.8 rlang_0.4.11
## [9] kableExtra_1.3.4 viridis_0.6.1 viridisLite_0.4.0 pbapply_1.4-3
## [13] e1071_1.7-6 caret_6.0-88 ggplot2_3.3.3 lattice_0.20-44
## [17] ranger_0.12.1 readxl_1.3.1 warbleR_1.1.27 NatureSounds_1.0.4
## [21] knitr_1.33 seewave_2.1.6 tuneR_1.3.3 devtools_2.4.1
## [25] usethis_2.0.1
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 Hmisc_4.5-0 systemfonts_1.0.1
## [4] plyr_1.8.6 splines_4.0.5 digest_0.6.27
## [7] foreach_1.5.1 htmltools_0.5.1.1 fansi_0.4.2
## [10] checkmate_2.0.0 magrittr_2.0.1 memoise_2.0.0
## [13] cluster_2.1.2 remotes_2.3.0 recipes_0.1.16
## [16] gower_0.2.2 RcppParallel_5.1.4 svglite_2.0.0
## [19] prettyunits_1.1.1 jpeg_0.1-8.1 colorspace_2.0-1
## [22] signal_0.7-6 rvest_1.0.0 xfun_0.23
## [25] dplyr_1.0.5 callr_3.7.0 crayon_1.4.1
## [28] RCurl_1.98-1.3 jsonlite_1.7.2 survival_3.2-11
## [31] iterators_1.0.13 glue_1.4.2 gtable_0.3.0
## [34] ipred_0.9-11 webshot_0.5.2 pkgbuild_1.2.0
## [37] scales_1.1.1 Rcpp_1.0.6 dtw_1.22-3
## [40] htmlTable_2.2.1 xtable_1.8-4 foreign_0.8-81
## [43] matlab_1.0.2 proxy_0.4-25 Formula_1.2-4
## [46] stats4_4.0.5 lava_1.6.9 prodlim_2019.11.13
## [49] htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2
## [52] ellipsis_0.3.2 farver_2.1.0 pkgconfig_2.0.3
## [55] nnet_7.3-16 sass_0.3.1 utf8_1.2.1
## [58] labeling_0.4.2 tidyselect_1.1.1 reshape2_1.4.4
## [61] later_1.2.0 munsell_0.5.0 cellranger_1.1.0
## [64] tools_4.0.5 cachem_1.0.5 cli_2.5.0
## [67] generics_0.1.0 evaluate_0.14 stringr_1.4.0
## [70] fastmap_1.1.0 yaml_2.2.1 ModelMetrics_1.2.2.2
## [73] processx_3.5.2 fs_1.5.0 purrr_0.3.4
## [76] nlme_3.1-152 mime_0.10 formatR_1.9
## [79] xml2_1.3.2 compiler_4.0.5 rstudioapi_0.13
## [82] png_0.1-7 testthat_3.0.2 tibble_3.1.2
## [85] bslib_0.2.4 stringi_1.6.2 highr_0.9
## [88] ps_1.6.0 desc_1.3.0 Matrix_1.3-3
## [91] fftw_1.0-6 vctrs_0.3.8 pillar_1.6.1
## [94] lifecycle_1.0.0 jquerylib_0.1.4 data.table_1.14.0
## [97] bitops_1.0-7 httpuv_1.6.1 R6_2.5.0
## [100] latticeExtra_0.6-29 promises_1.2.0.1 gridExtra_2.3
## [103] sessioninfo_1.1.1 codetools_0.2-18 boot_1.3-28
## [106] MASS_7.3-54 pkgload_1.2.1 rprojroot_2.0.2
## [109] rjson_0.2.20 withr_2.4.2 Deriv_4.1.3
## [112] expm_0.999-6 parallel_4.0.5 grid_4.0.5
## [115] rpart_4.1-15 timeDate_3043.102 class_7.3-19
## [118] rmarkdown_2.8 pROC_1.17.0.1 shiny_1.6.0
## [121] lubridate_1.7.10 base64enc_0.1-3